# Table 2 and 3
# Figure 2
# ===============

# Prepare data for analysis

# keep only observations for which all gender information
tomodel_rec <- reviewer_data %>% 
         group_by(manuscript_id) %>% 
         mutate(n_miss_fem =  sum(is.na(r_gender) == TRUE)) %>% 
         ungroup() %>%
         filter(n_miss_fem == 0 & is.na(gender_type3) == FALSE)

# Calculate weights        
tomodel_rec <- tomodel_rec %>% 
        group_by(manuscript_id) %>% 
        mutate(
                 n_fem =  sum(fem_r)
                , n_male = sum(male_r)
                , n_referees_weighted = n_fem + n_male
                , weight_n = 1/n_referees_weighted
        ) %>%
        filter(n_referees >1)

# Recommendation
# =====================
mRec1 <- felm(pos_rec ~ fem_r | as.factor(manuscript_id), data = tomodel_rec, weights = tomodel_rec$weight_n)

mRec2 <- felm(pos_rec ~ (fem_r*gender_type3)-gender_type3 | as.factor(manuscript_id), data = tomodel_rec, weights = tomodel_rec$weight_n)

avg_pos_rec <- mean(tomodel_rec$pos_rec, na.rm = T)

# Review Duration
# =====================
mDur1 <- felm(duration ~ fem_r | as.factor(manuscript_id), data = tomodel_rec, weights = tomodel_rec$weight_n)

mDur2 <- felm(duration ~ (fem_r*gender_type3)-gender_type3 | as.factor(manuscript_id), data = tomodel_rec, weights = tomodel_rec$weight_n)

avg_dur <- mean(tomodel_rec$duration, na.rm = T)

# ====================
# Text Analysis
# ====================
tomodel_text <- tomodel_rec %>%
        filter(missing_comment == 0 & missing_sentiment == 0)

# Sentiment
# =====================
mSent1 <- felm(sentimentscore ~ fem_r | as.factor(manuscript_id), data = tomodel_text, weights = tomodel_text$weight_n)

mSent2 <- felm(sentimentscore ~ (fem_r*gender_type3)-gender_type3 | as.factor(manuscript_id), data = tomodel_text, weights = tomodel_text$weight_n)

avg_sent <- mean(tomodel_text$sentimentscore, na.rm = T)

# Wordcount
# =====================
mWordcount1 <- felm(wordcount_adj ~ fem_r | as.factor(manuscript_id), data = tomodel_text, weights = tomodel_text$weight_n)

mWordcount2 <- felm(wordcount_adj ~ (fem_r*gender_type3)-gender_type3 | as.factor(manuscript_id), data = tomodel_text, weights = tomodel_text$weight_n)

avg_wordcount <- mean(tomodel_text$wordcount_adj, na.rm = T)

# ======================
# MAIN OUTPUT TABLES
# ======================

# Table 2
texreg(list(mRec1, mWordcount1, mSent1, mDur1)
       , custom.model.names = c("Non-reject", "Review Length", "Sentiment", "Duration")
       , custom.coef.names = c("Female Reviewer")
       , caption.above = TRUE
       , caption = "OLS regression results: Gender reviewing feedback"
       , file = here("02_output", "01_tables", "table2.tex"))


# Table 3
texreg(list(mRec2, mWordcount2, mSent2, mDur2)
       , custom.model.names = c("Non-reject", "Review Length", "Sentiment", "Duration")
       , custom.coef.names = c("Female Reviewer"
                               , "Male * Female Reviewer", "Female * Female Reviewer")
       , caption.above = TRUE
       , caption = "OLS regression results: Gender reviewing feedback and authorship type"
       , file = here("02_output", "01_tables", "table3.tex"))


# =========================
# Figure 2
# =========================

# extract coefficient and variance-covariance matrix
models <- list(mRec2, mWordcount2, mSent2, mDur2)
modelnames <- c("recommendation", "wordcount", "sentiment", "duration")

# Calculate marginal effects 
# (incl. standard errors based on Brambor et al. 2006)

to_plot <- NULL

# Loop over regression models
for(m in 1:length(models)){
        beta.hat <- coef(models[[m]]) 
        cov <- vcov(models[[m]])
        
        z0 <- c(0,1)
        
        var <- c("gender_type3Male", "gender_type3Female")
        
        toplot <- NULL
        plot.margins <- NULL
        for(i in var){
                
                var.i <- paste("fem_r:", i, sep = "")
                
                dy.dx <- beta.hat["fem_r"] + beta.hat[var.i]*z0
                
                se.dy.dx <- sqrt(cov["fem_r", "fem_r"] + z0^2*cov[var.i, var.i] + 2*z0*cov["fem_r", var.i])
                upr <- dy.dx + 1.96*se.dy.dx
                lwr <- dy.dx - 1.96*se.dy.dx
                
                toplot <- cbind(z0, dy.dx, upr, lwr) %>% data.frame() %>% mutate(var = var.i)
                plot.margins <- bind_rows(plot.margins, toplot)
        }
        
        plot.margins <- plot.margins %>% mutate(var = ifelse(z0 == 0, "fem_r:gender_type3Mixed Team", var)) %>% distinct() %>%
                mutate(var = factor(var, levels = c("fem_r:gender_type3Male", "fem_r:gender_type3Mixed Team", "fem_r:gender_type3Female")
                                    , labels = c( "Male", "Mixed","Female")
                )) 
        
        plot.margins <- plot.margins %>% mutate(model = modelnames[m])
        
        to_plot <- bind_rows(to_plot, plot.margins)
}

# Plot
to_plot <- to_plot %>% mutate(model = factor(model, levels = c("recommendation", "wordcount", "sentiment", "duration")
                                             , labels = c("Non-Reject Recommendation", "Length", "Sentiment", "Duration")))

p <- ggplot(to_plot, aes(x = var, y = dy.dx, group = 1)) + 
        geom_point(size = 3) + 
        geom_linerange(aes(ymin = lwr, ymax = upr)) + 
        theme_minimal() +
        theme(axis.text.x = element_text(size=20), text = element_text(size=20)) +
        geom_hline(yintercept = 0, col = "red", lty = 2) + 
        labs(x = "", y = "average marginal effect") + 
        facet_wrap(~model, ncol = 2, scales = "free_y")

# save

# as PDF
ggsave(plot = p, here("02_output", "02_figures", "figure2.pdf") , width = 12, height = 8)

# as TIFF
ggsave(plot = p, here("02_output", "02_figures", "figure2.tiff") 
       , width = 12, height = 8, dpi = 300, device = "tiff")
